/*
Robustness checks for the main event study estimates.
Produces results for Tables A1, A2, A3.

Variations:
- 2-digit industry weighting (vs. 1-digit baseline)
- sector x education controls
- controlling for partner characteristics
- dropping cohorts within 10 years of retirement
- 50%+ ownership threshold
- 25%+ ownership threshold
- minimum firm age 3, 5, or 7 years
- ownership share event study
- alternative trimming thresholds (2.5/97.5 and 5/95)

*/

****************************************************************
****************************************************************
************** weighting by 2-digit industry
****************************************************************
****************************************************************

clear
set maxvar 30000

cd "$root"
do "$root/code/admin/ado/stack_weight_panel.ado"
do "$root/code/admin/ado/trim_impute.ado"
do "$root/code/admin/ado/event_study.ado"


global figpath $root/estimates/admin/robustness

** first, set up the unstacked data
use data/admin/intermediate/penalty_cohort_final.dta, clear
keep if male == 0

global outcomes_raw ///
    operating_profits ///
    profit_before_self
    
keep lnr lfirm year ///
    cohort_id year_first_child ///
    male age currently_own edlevel ///
    firm_age ind_cat* sic_2digit $outcomes_raw
    
gen firm_age_match = firm_age
replace firm_age_match = 20 if firm_age > 20
    
* stack across cohorts and construct weights for the control firms
stack_weight_sample ind_cat2 sic_2digit, ///
    agevar(firm_age_match) ///
    min_event(-5) ///
    max_event(10) ///
    first_cohort(2002) ///
    last_cohort(2018) ///
    outcomes($outcomes_raw)

* trim variables and impute zeros    
trim_impute profit_before_self, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0.5) ///
    centile_ub(99.5)
    
* one-hot encode the covariate dummies
quietly {
tab firm_age_match, gen(dfirmage)
tab ind_cat2, gen(dind)
tab age, gen(dage)
tab edlevel, gen(dedl)
}

* get the sample means in t-1 for the treated
preserve
keep if treated == 1 & year == cohort_id - 1
collapse (mean) profit_before_self_trim [aweight=X_wt], by(male)
export delimited using ${figpath}/pretreat_means_2digit.csv, replace
restore

		     	
aggregate_event_study profit_before_self_trim if male == 0, ///
        min_event(-5) ///
	max_event(10) ///
        x(dfirmage* dind* dage* dedl*) ///
        store(temp) ///
	percentage ///
        save("${figpath}/profit_before_self_trim_2digit_male0.csv")

cd "$root"

****************************************************************
****************************************************************
************** weighting by sector * education
****************************************************************
****************************************************************

clear
set maxvar 30000

cd "$root"
do "$root/code/admin/ado/stack_weight_panel.ado"
do "$root/code/admin/ado/trim_impute.ado"
do "$root/code/admin/ado/event_study.ado"


global figpath $root/estimates/admin/robustness

** first, set up the unstacked data
use data/admin/intermediate/penalty_cohort_final.dta, clear
keep if male == 0

global outcomes_raw ///
    operating_profits ///
    profit_before_self
    
keep lnr lfirm year ///
    cohort_id year_first_child ///
    male age currently_own edlevel ///
    firm_age ind_cat* $outcomes_raw
    
gen firm_age_match = firm_age
replace firm_age_match = 20 if firm_age > 20
    
* stack across cohorts and construct weights for the control firms
stack_weight_sample ind_cat2 edlevel, ///
    agevar(firm_age_match) ///
    min_event(-5) ///
    max_event(10) ///
    first_cohort(2002) ///
    last_cohort(2018) ///
    outcomes($outcomes_raw)

* trim variables and impute zeros    
trim_impute profit_before_self, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0.5) ///
    centile_ub(99.5)
    
* one-hot encode the covariate dummies
quietly {
tab firm_age_match, gen(dfirmage)
tab ind_cat2, gen(dind)
tab age, gen(dage)
tab edlevel, gen(dedl)
}

* get the sample means in t-1 for the treated
preserve
keep if treated == 1 & year == cohort_id - 1
collapse (mean) profit_before_self_trim [aweight=X_wt], by(male)
export delimited using ${figpath}/pretreat_means_educ.csv, replace
restore

* estimate for the outcomes where we want percentages     	
    aggregate_event_study profit_before_self_trim if male == 0, ///
        min_event(-5) ///
	max_event(10) ///
        x(dfirmage* dind* dage* dedl*) ///
        store(temp) ///
	percentage ///
        save("${figpath}/profit_before_self_trim_educ_male0.csv")

cd "$root"


****************************************************************
****************************************************************
************** using other parents as controls
****************************************************************
****************************************************************

clear
set maxvar 30000

cd "$root"
do "$root/code/admin/ado/stack_weight_panel.ado"
do "$root/code/admin/ado/trim_impute.ado"
do "$root/code/admin/ado/event_study.ado"


global figpath $root/estimates/admin/robustness

** first, set up the unstacked data
use data/admin/intermediate/penalty_cohort_final.dta, clear
keep if male == 0

global outcomes_raw ///
    operating_profits ///
    profit_before_self
    
keep lnr lfirm year ///
    cohort_id year_first_child ///
    male age currently_own edlevel ///
    firm_age ind_cat* $outcomes_raw
    
gen firm_age_match = firm_age
replace firm_age_match = 20 if firm_age > 20
    
* stack across cohorts and construct weights for the control firms
stack_weight_sample ind_cat2, ///
    agevar(firm_age_match) ///
    min_event(-5) ///
    max_event(10) ///
    first_cohort(2002) ///
    last_cohort(2018) ///
    outcomes($outcomes_raw) ///
    control("parents")
    
* trim variables and impute zeros    
trim_impute profit_before_self, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0.5) ///
    centile_ub(99.5)
    
* one-hot encode the covariate dummies
quietly {
tab firm_age_match, gen(dfirmage)
tab ind_cat2, gen(dind)
tab age, gen(dage)
tab edlevel, gen(dedl)
}

* get the sample means in t-1 for the treated
preserve
keep if treated == 1 & year == cohort_id - 1
collapse (mean) profit_before_self_trim [aweight=X_wt], by(male)
export delimited using ${figpath}/pretreat_means_parentcontrol.csv, replace
restore

aggregate_event_study profit_before_self_trim, ///
	min_event(-5) ///
	max_event(10) ///
	x(dfirmage* dind* dage* dedl*) ///
	store(temp) ///
	percentage ///
	save("${figpath}/profit_before_self_trim_parentcontrol_male0.csv")

****************************************************************
****************************************************************
************** removing controsl whose last child was born <10 years before
****************************************************************
****************************************************************

clear
set maxvar 30000

cd "$root"
do "$root/code/admin/ado/stack_weight_panel.ado"
do "$root/code/admin/ado/trim_impute.ado"
do "$root/code/admin/ado/event_study.ado"


global figpath $root/estimates/admin/robustness

** first, set up the unstacked data
use data/admin/intermediate/penalty_cohort_final.dta, clear
keep if male == 0

global outcomes_raw ///
    operating_profits ///
    profit_before_self
    
keep lnr lfirm year ///
    cohort_id year_first_child year_last_child ///
    male age currently_own edlevel ///
    firm_age ind_cat* $outcomes_raw
    
gen firm_age_match = firm_age
replace firm_age_match = 20 if firm_age > 20
    
* stack across cohorts and construct weights for the control firms
stack_weight_sample ind_cat2, ///
    agevar(firm_age_match) ///
    min_event(-5) ///
    max_event(10) ///
    first_cohort(2002) ///
    last_cohort(2018) ///
    outcomes($outcomes_raw) ///
    control("parents_m10")
    
* trim variables and impute zeros    
trim_impute profit_before_self, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0.5) ///
    centile_ub(99.5)
    
* one-hot encode the covariate dummies
quietly {
tab firm_age_match, gen(dfirmage)
tab ind_cat2, gen(dind)
tab age, gen(dage)
tab edlevel, gen(dedl)
}

* get the sample means in t-1 for the treated
preserve
keep if treated == 1 & year == cohort_id - 1
collapse (mean) profit_before_self_trim [aweight=X_wt], by(male)
export delimited using ${figpath}/pretreat_means_m10control.csv, replace
restore

aggregate_event_study profit_before_self_trim, ///
	min_event(-5) ///
	max_event(10) ///
	x(dfirmage* dind* dage* dedl*) ///
	store(temp) ///
	percentage ///
	save("${figpath}/profit_before_self_trim_m10control_male0.csv")

****************************************************************
****************************************************************
************** requiring 50%+ ownership
****************************************************************
****************************************************************

clear
set maxvar 30000

cd "$root"
do "$root/code/admin/ado/stack_weight_panel.ado"
do "$root/code/admin/ado/trim_impute.ado"
do "$root/code/admin/ado/event_study.ado"


global figpath $root/estimates/admin/robustness

** first, set up the unstacked data
use data/admin/intermediate/penalty_cohort_final.dta, clear
replace currently_own = inlist(owner_cat, 1, 2) // how ownership is inferred -> 50% req.

global outcomes_raw ///
    operating_profits ///
    profit_before_self
    
keep lnr lfirm year ///
    cohort_id year_first_child ///
    male age currently_own edlevel ///
    firm_age ind_cat* $outcomes_raw
    
gen firm_age_match = firm_age
replace firm_age_match = 20 if firm_age > 20
    
* stack across cohorts and construct weights for the control firms
stack_weight_sample ind_cat2, ///
    agevar(firm_age_match) ///
    min_event(-5) ///
    max_event(10) ///
    first_cohort(2002) ///
    last_cohort(2018) ///
    outcomes($outcomes_raw)
    
* trim variables and impute zeros    
trim_impute profit_before_self, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0.5) ///
    centile_ub(99.5)
    
* one-hot encode the covariate dummies
quietly {
tab firm_age_match, gen(dfirmage)
tab ind_cat2, gen(dind)
tab age, gen(dage)
tab edlevel, gen(dedl)
}

* get the sample means in t-1 for the treated
preserve
keep if treated == 1 & year == cohort_id - 1
collapse (mean) profit_before_self_trim [aweight=X_wt], by(male)
export delimited using ${figpath}/pretreat_means_50plus.csv, replace
restore

aggregate_event_study profit_before_self_trim if male == 0, ///
	min_event(-5) ///
	max_event(10) ///
	x(dfirmage* dind* dage* dedl*) ///
	store(temp) ///
	percentage ///
	save("${figpath}/profit_before_self_trim_50plus_male0.csv")

aggregate_event_study profit_before_self_trim if male == 1, ///
	min_event(-5) ///
	max_event(10) ///
	x(dfirmage* dind* dage* dedl*) ///
	store(temp) ///
	percentage ///
	save("${figpath}/profit_before_self_trim_50plus_male1.csv")

****************************************************************
****************************************************************
************** requiring 25%+ ownership
****************************************************************
****************************************************************


clear
set maxvar 30000

cd "$root"
do "$root/code/admin/ado/stack_weight_panel.ado"
do "$root/code/admin/ado/trim_impute.ado"
do "$root/code/admin/ado/event_study.ado"


global figpath $root/estimates/admin/robustness

** first, set up the unstacked data
use data/admin/intermediate/penalty_cohort_final_25p.dta, clear // has 25%+

global outcomes_raw ///
    operating_profits ///
    profit_before_self
    
keep lnr lfirm year ///
    cohort_id year_first_child ///
    male age currently_own edlevel ///
    firm_age ind_cat* $outcomes_raw
    
gen firm_age_match = firm_age
replace firm_age_match = 20 if firm_age > 20
    
* stack across cohorts and construct weights for the control firms
stack_weight_sample ind_cat2, ///
    agevar(firm_age_match) ///
    min_event(-5) ///
    max_event(10) ///
    first_cohort(2002) ///
    last_cohort(2018) ///
    outcomes($outcomes_raw)
    
* trim variables and impute zeros    
trim_impute profit_before_self, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0.5) ///
    centile_ub(99.5)
    
* one-hot encode the covariate dummies
quietly {
tab firm_age_match, gen(dfirmage)
tab ind_cat2, gen(dind)
tab age, gen(dage)
tab edlevel, gen(dedl)
}

* get the sample means in t-1 for the treated
preserve
keep if treated == 1 & year == cohort_id - 1
collapse (mean) profit_before_self_trim [aweight=X_wt], by(male)
export delimited using ${figpath}/pretreat_means_25plus.csv, replace
restore

aggregate_event_study profit_before_self_trim if male == 0, ///
	min_event(-5) ///
	max_event(10) ///
	x(dfirmage* dind* dage* dedl*) ///
	store(temp) ///
	percentage ///
	save("${figpath}/profit_before_self_trim_25plus_male0.csv")

aggregate_event_study profit_before_self_trim if male == 1, ///
	min_event(-5) ///
	max_event(10) ///
	x(dfirmage* dind* dage* dedl*) ///
	store(temp) ///
	percentage ///
	save("${figpath}/profit_before_self_trim_25plus_male1.csv")

****************************************************************
****************************************************************
************** requiring firm to be at least N years old
****************************************************************
****************************************************************

foreach minage in 3 5 7 {

clear
set maxvar 30000

cd "$root"
do "$root/code/admin/ado/stack_weight_panel.ado"
do "$root/code/admin/ado/trim_impute.ado"
do "$root/code/admin/ado/event_study.ado"


global figpath $root/estimates/admin/robustness

** first, set up the unstacked data
use data/admin/intermediate/penalty_cohort_final.dta, clear

global outcomes_raw ///
    operating_profits ///
    profit_before_self

keep lnr lfirm year ///
    cohort_id year_first_child ///
    male age currently_own edlevel ///
    firm_age ind_cat* $outcomes_raw

gen firm_age_match = firm_age
replace firm_age_match = 20 if firm_age > 20

* stack across cohorts and construct weights for the control firms
stack_weight_sample ind_cat2, ///
    agevar(firm_age_match) ///
    min_event(-5) ///
    max_event(10) ///
    first_cohort(2002) ///
    last_cohort(2018) ///
    outcomes($outcomes_raw) ///
    min_age(`minage')

* trim variables and impute zeros
trim_impute profit_before_self, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0.5) ///
    centile_ub(99.5)

* one-hot encode the covariate dummies
quietly {
tab firm_age_match, gen(dfirmage)
tab ind_cat2, gen(dind)
tab age, gen(dage)
tab edlevel, gen(dedl)
}

* get the sample means in t-1 for the treated
preserve
keep if treated == 1 & year == cohort_id - 1
collapse (mean) profit_before_self_trim [aweight=X_wt], by(male)
export delimited using ${figpath}/pretreat_means_minage`minage'.csv, replace
restore

forvalues g = 0/1 {
aggregate_event_study profit_before_self_trim if male == `g', ///
	min_event(-5) ///
	max_event(10) ///
	x(dfirmage* dind* dage* dedl*) ///
	store(temp) ///
	percentage ///
	save("${figpath}/profit_before_self_trim_minage`minage'_male`g'.csv")
}

}

****************************************************************
****************************************************************
************** ownership percentage
****************************************************************
****************************************************************

clear
set maxvar 30000

cd "$root"
do "$root/code/admin/ado/stack_weight_panel.ado"
do "$root/code/admin/ado/trim_impute.ado"
do "$root/code/admin/ado/event_study.ado"


global figpath $root/estimates/admin/robustness

** first, set up the unstacked data
use data/admin/intermediate/penalty_cohort_final.dta, clear

global outcomes_raw ///
    operating_profits ///
    profit_before_self ///
    ownership_pct
    
keep lnr lfirm year ///
    cohort_id year_first_child ///
    male age currently_own edlevel ///
    firm_age ind_cat* $outcomes_raw
    
gen firm_age_match = firm_age
replace firm_age_match = 20 if firm_age > 20
    
* stack across cohorts and construct weights for the control firms
stack_weight_sample ind_cat2, ///
    agevar(firm_age_match) ///
    min_event(-5) ///
    max_event(10) ///
    first_cohort(2006) ///
    last_cohort(2018) ///
    outcomes($outcomes_raw)
    
* one-hot encode the covariate dummies
quietly {
tab firm_age_match, gen(dfirmage)
tab ind_cat2, gen(dind)
tab age, gen(dage)
tab edlevel, gen(dedl)
}

aggregate_event_study ownership_pct if male == 0, ///
	min_event(-5) ///
	max_event(10) ///
	x(dfirmage* dind* dage* dedl*) ///
	store(temp) ///
	percentage ///
	save("${figpath}/ownership_pct_0618_male0.csv")

aggregate_event_study ownership_pct if male == 1, ///
	min_event(-5) ///
	max_event(10) ///
	x(dfirmage* dind* dage* dedl*) ///
	store(temp) ///
	percentage ///
	save("${figpath}/ownership_pct_0618_male1.csv")

****************************************************************
****************************************************************
************** Alternative trimming thresholds
****************************************************************
****************************************************************

cd "$root"


global figpath $root/estimates/admin/robustness

* define trimming thresholds: label, lower centile, upper centile
local trim_labels  25 5
local trim_lb_25   2.5
local trim_ub_25   97.5
local trim_lb_5    5
local trim_ub_5    95

foreach t of local trim_labels {

clear
set maxvar 30000

cd "$root"
do "$root/code/admin/ado/stack_weight_panel.ado"
do "$root/code/admin/ado/trim_impute.ado"
do "$root/code/admin/ado/event_study.ado"


** first, set up the unstacked data
use data/admin/intermediate/penalty_cohort_final.dta, clear

global outcomes_raw ///
    operating_profits ///
    profit_before_self ///
    revenues ///
    n_workers_total ///
    costs_before_self ///
    owner_salaries ///
    n_emp_excl_owners ///
    value_added

keep lnr lfirm year ///
    cohort_id year_first_child ///
    male age currently_own edlevel ///
    firm_age ind_cat* $outcomes_raw

gen firm_age_match = firm_age
replace firm_age_match = 20 if firm_age > 20

* stack across cohorts and construct weights for the control firms
stack_weight_sample ind_cat2, ///
    agevar(firm_age_match) ///
    min_event(-5) ///
    max_event(10) ///
    first_cohort(2002) ///
    last_cohort(2018) ///
    outcomes($outcomes_raw)

replace revenues = 0 if revenues < 0
replace owner_salaries = 0 if owner_salaries < 0

* generate transformations of revenues, salaries
gen pos_rev = (revenues > 0 & revenues != .)
gen log_revenues = log(revenues)
gen log_costs = log(costs_before_self)
gen log_emp = log(n_workers_total)

* trim variables and impute zeros
trim_impute profit_before_self, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(`trim_lb_`t'') ///
    centile_ub(`trim_ub_`t'')

trim_impute value_added, ///
    year_lb(1996) ///
    year_ub(2019) ///
    suffix(_trim) ///
    centile_lb(`trim_lb_`t'') ///
    centile_ub(`trim_ub_`t'')

trim_impute owner_salaries, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0) ///
    centile_ub(`trim_ub_`t'')

trim_impute n_emp_excl_owners, ///
    year_lb(1996) ///
    year_ub(2018) ///
    suffix(_trim) ///
    centile_lb(0) ///
    centile_ub(`trim_ub_`t'')

* one-hot encode the covariate dummies
quietly {
tab firm_age_match, gen(dfirmage)
tab ind_cat2, gen(dind)
tab age, gen(dage)
tab edlevel, gen(dedl)
}

* get the sample means in t-1 for the treated
preserve
keep if treated == 1 & year == cohort_id - 1
collapse (mean) profit_before_self_trim ///
                owner_salaries_trim ///
		log_revenues ///
		log_costs ///
		log_emp ///
		n_emp_excl_owners_trim ///
		pos_rev ///
		value_added_trim ///
		[aweight=X_wt], by(male)
export delimited using ${figpath}/pretreat_means_trim`t'.csv, replace
restore

* estimate for the outcomes where we want percentages
foreach Y of varlist profit_before_self_trim ///
		     owner_salaries_trim ///
		     value_added_trim {

forvalues g = 0/1 {
    aggregate_event_study `Y' if male == `g', ///
        min_event(-5) ///
	max_event(10) ///
        x(dfirmage* dind* dage* dedl*) ///
        store(temp) ///
	percentage ///
        save("${figpath}/`Y'_ATT_male`g'_trim`t'.csv")
}

cd "$root"

}

* estimate when we don't want percentages
foreach Y of varlist n_emp_excl_owners_trim {

forvalues g = 0/1 {

    aggregate_event_study `Y' if male == `g', ///
        min_event(-5) ///
	max_event(10) ///
        x(dfirmage* dind* dage* dedl*) ///
        store(temp) ///
        save("${figpath}/`Y'_ATT_male`g'_trim`t'.csv")
}

cd "$root"

}

}

